#!/bin/bash
########################################################################
# (C) Copyright 2008 Muhammed Uluyol
########################################################################
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
########################################################################
#
# This script generates a book database from the
# website http://www.iblist.com/

[ -f lib_helper.cfg ] || echo "CANT FIND CONFIG"
source lib_helper.cfg

# make our list
get_list()
{
curl http://www.iblist.com/list.php?type=book 2>/dev/null | grep -i a | grep book[0-9] | sed 's|.*href="book|book|i' | sed 's|</a>.*||i' | sed 's| |_|g' | sed 's|htm*|htm|g'
}

# get our files
# $1 == dir to place
# $2 == top url
get_files()
{
[ -d $1 ] || mkdir -p $1
while read bookurl; do
	BOOK_NAME=$(echo $bookurl | sed 's|">.*||')
	echo "* Downloading $BOOK_NAME"
	curl --silent --output $1/$BOOK_NAME $2/$BOOK_NAME
done
for waste in `ls -1 $1 | grep htm..*`; do
	rm -rf $1/$waste
done
}

# convert html into db file
# $1 == dir to place
# $2 == dir to search
convert_html_db()
{
[ -d $1 ] || mkdir -p $1
for html in `ls -1 $2 | grep htm`; do
	FILE_TO_WRITE=$(echo $html | sed 's|.htm||' | sed 's|book|ibdb|g').bk
	echo "* Generating $FILE_TO_WRITE"
	echo "TITLE: $(cat $2/$html | grep "<h2>Book Information: " | sed 's|.*: ||' | sed 's:</h2>::')" >$1/$FILE_TO_WRITE
	echo "AUTHOR: $(cat $2/$html | sed 's|.*<li>.*||' | grep "by <" | sed -e s:.*htm\"\>:: | sed 's|<.*||')" >>$1/$FILE_TO_WRITE
	echo "SUMMARY: $(cat $2/$html | grep "div class=\"indent\">" | sed -e 's|<.>||g' -e 's|.*<div class="indent">||' -e 's|</div>||' | sed 's|<.*||g' | grep -v "^$")" >>$1/$FILE_TO_WRITE
	echo "TAGS: $(cat $2/$html | grep Genre: | sed 's| ||g' | sed -e 's|.*<i>Genre:</i>||' | sed 's|<ahref=\"http://www.iblist.com/list.php?type=book&by=genre&genre=[0-9]*||g' | sed 's|&rarr;|,|g' | sed -e 's|>||g' -e 's|<||g' | sed -e 's|/a||g' -e 's|br/||g' -e s:\"::g)" >>$1/$FILE_TO_WRITE
done
}

get_list | get_files ibdb_html http://www.iblist.com
convert_html_db $DB_DIR ibdb_html

